I’ve created 10,000 simulations of populations undergoing different levels of sexual reproduction and analyzed the index of association along with genotypic and allelic diversity metrics, including an analysis of the contribution of each locus to the genotypic diversity.
This document will perform a power analysis of \(\bar{r}_d\) by calculating Receiver Operator Characteristic (ROC) Curves. These curves are useful in assessing the power of an analysis by assessing the true positive fraction and false positive fraction against different values of \(\alpha\).
In our case, we are asking the question:
How well can \(\bar{r}_d\) detect non-random mating (clonal reproduction)?
We will be looking at factors such as sample size, percent of clonal reproduction, and variable mutation rates. To perform the analyis, the data will be split up into two sets: “Random Mating” and “Non-Random Mating”. The Random mating set will always be the data set where the sex rate is one. This will serve to assess our Type 1 (False Positive) fraction. The Non-Random mating component will each rate of sexual reproduction less than one. In this data set, that means that 9 total ROC curves will be produced for each overall comparison.
Since we simulated the populations in a way such that each parent population is one of 10 replicates spawned from 100 unique seeds across rates of sexual reproduction, we can group the populations by seed and calculate an ROC Curve for each seed.
The Area under the ROC Curve will be calcuated via the auc() function via flux.
library('zksimanalysis')
## Loading required package: tidyverse
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
## Loading required package: adegenet
## Loading required package: ade4
##
## /// adegenet 2.1.0 is loaded ////////////
##
## > overview: '?adegenet'
## > tutorials/doc/questions: 'adegenetWeb()'
## > bug reports/feature requests: adegenetIssues()
## Loading required package: poppr
## This is poppr version 2.4.1.99.2. To get started, type package?poppr
## OMP parallel support: available
##
## This version of poppr is under development.
## If you find any bugs, please report them at https://github.com/grunwaldlab/poppr/issues
## Loading required package: feather
## Loading required package: vcfR
##
## ***** *** vcfR *** *****
## This is vcfR 1.5.0
## browseVignettes('vcfR') # Documentation
## citation('vcfR') # Citation
## ***** ***** ***** *****
## Loading required package: poweRlaw
## Loading required package: flux
## Loading required package: caTools
## This is flux 0.3-0
## Loading required package: viridis
## Loading required package: viridisLite
# These are for manipulating ggplot2 graphics
library("gtable")
library("grid")
We load the data previously processed in ROC_calculation.Rmd.
system.time({
res <- load(here::here("data", "ma_ROC_data.rda"))
})
res
alpha <- seq(0, 1, by = 0.01)
old_theme <- theme_set(theme_bw(base_size = 16, base_family = "Helvetica"))
old_theme <- theme_update(axis.text.x = element_text(angle = 90, vjust = 0.5))
sexrate_theme <- theme(panel.border = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.major.y = element_line(color = "grey20")) +
theme(panel.grid.minor.y = element_blank())
sample_colors <- scale_color_viridis(end = 0.75, discrete = TRUE, direction = -1, option = "A")
sample_fill <- scale_fill_viridis(end = 0.75, discrete = TRUE, direction = -1, option = "A")
fancy_clone_correction <- . %>%
mutate(clone_correction = factor(clone_correction, labels = c("clone-corrected", "uncorrected")))
fancy_sample <- . %>%
mutate(sample = forcats::lvls_revalue(sample, paste("n =", sort(unique(sample)))))
fancy_mutation_rate <- . %>%
mutate(mutation_rate = factor(mutation_rate, labels = c("Even Mutation Rate", "Uneven Mutation Rate")))
oroc_plot <- bind_rows(roc_overall, roc_overall_ea, .id = "extra_info") %>%
mutate(extra_info = ifelse(extra_info == 1, "none", "E5")) %>%
ggplot(aes(x = `False Positive`, y = `True Positive`)) +
# geom_ribbon(aes(ymin = `True Positive` - TPsd, ymax = `True Positive` + TPsd, fill = sample),
# alpha = 0.25) +
geom_line(aes(color = sample, linetype = extra_info)) +
facet_grid(sexrate ~ mutation_rate + clone_correction, switch = "y") +
scale_y_continuous(position = "right") +
geom_abline(slope = 1, lty = 3) +
labs(list(
# title = "ROC Curves",
# subtitle = "over mutation rate and clone-correction by sex rate",
pch = "Sample\nSize",
color = "Sample\nSize",
fill = "Sample\nSize",
linetype = "Extra\nInformation"
)) +
theme(aspect.ratio = 1) +
theme(legend.position = "left") +
theme(panel.grid.minor = element_blank()) +
theme(text = element_text(size = 16)) +
theme(strip.text = element_text(size = 16, face = "bold")) +
sample_colors +
sample_fill
z <- ggplotGrob(oroc_plot)
locations <- grep("strip-t", z$layout$name)
strip <- gtable_filter(z, "strip-t", trim = FALSE)
top <- strip$layout$t[1]
l <- strip$layout$l[c(1, 3)]
r <- strip$layout$r[c(2, 4)]
mat <- matrix(vector("list", length = 6), nrow = 2)
mat[] <- list(zeroGrob())
res <- gtable_matrix("toprow", mat, unit(c(1, 0, 1), "null"), unit(c(1, 1), "null"))
zz <- res %>%
gtable_add_grob(z$grobs[[locations[1]]]$grobs[[1]], 1, 1, 1, 3) %>%
gtable_add_grob(z, ., t = top, l = l[1], b = top, r = r[1], name = c("add-strip"))
oroc_plot_mod <- gtable_add_grob(res, z$grobs[[locations[3]]]$grobs[[1]], 1, 1, 1, 3) %>%
gtable_add_grob(zz, ., t = top, l = l[2], b = top, r = r[2], name = c("add-strip"))
grid.newpage()
grid.draw(oroc_plot_mod)
# ggsave(plot = oroc_plot_mod,
# file = here::here('manuscript', 'figure', 'ROC_Curve.pdf'),
# width = 6.5, height = 10, dpi = 600)
total_oroc_plot <- bind_rows(total_roc, total_roc_ea, .id = "extra_info") %>%
mutate(extra_info = ifelse(extra_info == 1, "none", "E5")) %>%
ggplot(aes(x = `False Positive`, y = `True Positive`)) +
# geom_ribbon(aes(ymin = `True Positive` - TPsd, ymax = `True Positive` + TPsd, fill = sample),
# alpha = 0.25) +
geom_line(aes(color = sample, linetype = extra_info)) +
facet_grid(mutation_rate ~ clone_correction) +
geom_abline(slope = 1, lty = 3) +
labs(list(
title = "ROC Curves",
pch = "Sample Size",
color = "Sample Size",
fill = "Sample Size",
linetype = "Extra Inforamtion"
)) +
theme(aspect.ratio = 1) +
theme(legend.position = "top") +
theme(text = element_text(size = 16)) +
sample_colors +
sample_fill
total_oroc_plot
Now we can visualize the data as facetted boxplots
# Add an annotation indicating that 0.5 is random assignment.
text_annotation <- data.frame(x = 0, y = 0.5, text = "0.5 = random", #\n Whole Data Set",
sample = "n = 10", mutation_rate = "Even Mutation Rate")
# point_annotation <- data.frame(x = 0.5, y = 0.125, sample = "n = 10",
# mutation_rate = "Even Mutation Rate")
# AURCO <- AURC_overall %>% fancy_clone_correction %>% fancy_mutation_rate
AURC_box <- . %>%
fancy_clone_correction %>%
mutate(clone_correction = forcats::fct_rev(clone_correction)) %>%
fancy_mutation_rate %>%
{
ggplot(., aes(x = factor(sexrate), y = AURC)) +
geom_boxplot(aes(fill = clone_correction)) +
annotate("rect", xmin = 0, xmax = 9.75, ymin = 0, ymax = 0.5, alpha = .2) +
geom_boxplot(aes(fill = clone_correction), outlier.size = 0.5) +
facet_grid(sample~mutation_rate, switch = "y") +
scale_y_continuous(position = "right") +
geom_text(aes(x = x, y = y, label = text), hjust = -0.1, vjust = 1.5, color = "gray25",
data = text_annotation) +
# geom_point(aes(x = x, y = y), color = "black", fill = "white",
# pch = 21, data = point_annotation) +
# geom_point(aes(x = factor(sexrate), y = AURC, pch = clone_correction), data = AURCO,
# position = position_dodge(width = 0.75), color = "black", fill = "white") +
# scale_shape_manual(values = c(21, 24)) +
labs(list(
fill = "Clone Correction",
x = "Rate of Sexual Reproduction"
# title = expression(paste("Area Under the ROC Curve for ", bar(r)[d])),
# subtitle = "calculated over sample size and mutation rate"
)) +
scale_fill_grey(start = 1, end = 0.5) +
theme(legend.position = "bottom") +
theme(strip.text = element_text(size = 16, face = "bold")) +
theme(strip.background = element_blank()) +
theme(text = element_text(size = 16)) +
sexrate_theme
}
AURC_box_plot <- AURC_by_seed %>% AURC_box
AURC_box_plot_ea <- AURC_by_seed_ea %>% AURC_box
# ggsave(plot = AURC_box_plot, width = 6, height = 7, dpi = 600,
# file = here::here('manuscript', 'figure', 'AURC_box_plot.pdf'))
# ggsave(plot = AURC_box_plot_ea, width = 6, height = 7, dpi = 600,
# file = here::here('manuscript', 'figure', 'AURC_box_plot_ea.pdf'))
AURC_box_plot
AURC_box_plot_ea
There is a lot of data here, and it’s clear that there is a difference between clone-corrected data and uncorrected data, but to be sure of that, we’ll do ANOVA tests between each pair of clone-corrected and un-corrected data, grouped by sex rate, sample size, and mutation rate.
AURC_by_seed %>%
group_by(sexrate, mutation_rate, sample) %>%
do(aov(AURC ~ clone_correction, data = ., contrasts = c("contr.helmert")) %>% broom::tidy()) %>%
filter(term != "Residuals") %>%
select(p.value) %>%
ungroup() %>%
mutate(mutation_rate = factor(mutation_rate, labels = c("Even Mutation Rate", "Uneven Mutation Rate"))) %>%
ggplot(aes(x = sexrate, y = sample, fill = p.value)) +
geom_tile() +
geom_text(aes(label = round(p.value, 2)), color = "grey50") +
scale_fill_viridis(direction = -1, begin = 0, end = 1, guide = "legend",
breaks = c(0, 0.01, 0.05, 0.1, 1)) +
facet_wrap(~mutation_rate, nrow = 1) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
theme(aspect.ratio = 4/9) +
theme(legend.position = "bottom") +
theme(strip.background = element_blank()) +
theme(strip.text = element_text(size = 16, face = "bold")) +
# theme(panel.spacing = unit(0, "cm")) +
theme(axis.title.x = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank()) +
theme(text = element_text(size = 16)) +
theme(plot.margin = unit(c(1, 1, 0, 1), "lines")) +
labs(list(
# title = "ANOVA over Clone Correction",
x = "Rate of Sexual Reproduction",
y = "Sample Size",
fill = "p-value (Clone Correction)"
)) -> cc_anova
## Adding missing grouping variables: `sexrate`, `mutation_rate`, `sample`
AURC_by_seed %>%
group_by(sexrate, mutation_rate, clone_correction) %>%
do(aov(AURC ~ sample, data = ., contrasts = c("contr.helmert")) %>% broom::tidy()) %>%
filter(term != "Residuals") %>%
select(p.value) %>%
ggplot(aes(x = sexrate, y = clone_correction, fill = p.value)) +
geom_tile() +
geom_text(aes(label = round(p.value, 2)), color = "grey50") +
scale_fill_viridis(direction = -1, begin = 0, end = 1, guide = "legend",
breaks = c(0, 0.01, 0.05, 0.1, 1), option = "B") +
facet_wrap(~mutation_rate, nrow = 1) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
theme(text = element_text(size = 16)) +
theme(aspect.ratio = 2/9) +
theme(legend.position = "top") +
theme(strip.background = element_blank()) +
theme(strip.text = element_blank()) +
# theme(panel.spacing = unit(0, "cm")) +
labs(list(
# title = "ANOVA over Sample Size",
x = "Rate of Sexual Reproduction",
y = "Clone Correction",
fill = "p-value (Sample Size) "
)) -> size_anova
## Adding missing grouping variables: `sexrate`, `mutation_rate`, `clone_correction`
AURC_by_seed %>%
group_by(sexrate, sample, clone_correction) %>%
do(aov(AURC ~ mutation_rate, data = .) %>% broom::tidy()) %>%
filter(term != "Residuals") %>%
select(p.value) %>%
ggplot(aes(x = sexrate, y = sample, fill = p.value)) +
geom_tile() +
geom_text(aes(label = round(p.value, 2)), color = "grey50") +
scale_fill_viridis(direction = -1, begin = 0, end = 1, guide = "legend",
breaks = c(0, 0.01, 0.05, 0.1, 1), option = "B") +
facet_wrap(~clone_correction, nrow = 2) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
theme(text = element_text(size = 16)) +
theme(aspect.ratio = 4/9) +
theme(legend.position = "top") +
theme(strip.text = element_text(size = 16, face = "bold")) +
theme(strip.background = element_blank()) +
# theme(panel.spacing = unit(0, "cm")) +
labs(list(
# title = "ANOVA over Sample Size",
x = "Rate of Sexual Reproduction",
y = "Sample Size",
fill = "p-value (Mutation Rate)"
)) -> mutation_anova
## Adding missing grouping variables: `sexrate`, `sample`, `clone_correction`
mutation_anova
g1 <- ggplotGrob(cc_anova)
g2 <- ggplotGrob(size_anova)
g <- rbind(g1, g2, size="first") # stack the two plots
g$widths <- unit.pmax(g1$widths, g2$widths) # use the largest widths
# center the legend vertically
# g$layout[grepl("guide", g$layout$name),c("t","b")] <- c(1,nrow(g))
grid.newpage()
grid.draw(g)
# ggsave(plot = g, file = here::here('manuscript', 'figure', 'AURC_ANOVA.pdf'),
# width = 8, height = 5, dpi = 600)
Here I’m attempting to run a full AMOVA on the AURC calculations by considering full interactions
clean_anova <- . %>%
mutate(p.value = p.adjust(p.value, "BH")) %>%
mutate(term = gsub(":", " X ", term)) %>%
mutate(term = gsub("sample", "Sample Size", term)) %>%
mutate(term = gsub("mutation_rate", "Mutation Rate", term)) %>%
mutate(term = gsub("clone_correction", "Clone Correction", term)) %>%
mutate(term = forcats::fct_inorder(factor(term))) %>%
ungroup() %>%
I()
filter_anova <- . %>%
filter(term != "Residuals") %>%
filter(term != "(Intercept)") %>%
I()
anova_heat <- . %>%
filter_anova %>%
{
ggplot(., aes(x = sexrate, y = forcats::fct_rev(term), fill = p.value)) +
geom_tile() +
geom_text(aes(label = round(p.value, 2), color = 1 - p.value)) +
scale_fill_viridis(direction = -1, begin = 0, end = 1, guide = "legend",
breaks = c(0, 0.01, 0.05, 0.1, 1)) +
theme(aspect.ratio = 7/9) +
geom_vline(xintercept = c(1:8) + 0.5) +
scale_y_discrete(expand = c(0, 0)) +
scale_x_discrete(expand = c(0, 0)) +
scale_color_gradient(low = "white", high = "black", guide = "none") +
theme(text = element_text(size = 16)) +
theme(legend.position = "top") +
labs(list(
fill = "p-value",
x = "Rate of Sexual Reproduction",
y = "ANOVA term"
))
}
anova_table <- . %>%
mutate(sexrate = ifelse(duplicated(sexrate), "", sexrate)) %>%
mutate(term = gsub("([A-Z])[a-z]+?\\s([A-Z])[a-z]+?(\\s|$)", "\\1\\2 ", term)) %>%
unclass() %>% # This step is necessary because there's unexpected behavior from tibble
as.data.frame() %>% # when passing to as.data.frame
knitr::kable(format = "pandoc", digits = 3) %>%
cat(sep = "\n")
# Type I ANOVA
AURC_ANOVA_I <- AURC_by_seed %>%
group_by(sexrate) %>%
do(aov(AURC ~ clone_correction * sample * mutation_rate, data = ., contrasts = "contr.helmert") %>% broom::tidy()) %>%
clean_anova
# Type II ANOVA
AURC_ANOVA_II <- map_df(.x = c(
"AURC ~ clone_correction * sample * mutation_rate",
"AURC ~ clone_correction * mutation_rate * sample",
"AURC ~ sample * mutation_rate * clone_correction"),
.f = ~AURC_by_seed %>%
group_by(sexrate) %>%
do(lm(as.formula(.x), data = ., contrasts = "contr.helmert") %>% anova() %>% broom::tidy()) %>%
slice(3L)
) %>%
clean_anova
# Type III ANOVA
AURC_ANOVA_III <- AURC_by_seed %>%
group_by(sexrate) %>%
do(lm(AURC ~ clone_correction * sample * mutation_rate, data = ., contrasts = "contr.helmert") %>%
car::Anova(type = 3) %>%
broom::tidy()) %>%
clean_anova
AURC_ANOVA_ea_I <- AURC_by_seed_ea %>%
group_by(sexrate) %>%
do(aov(AURC ~ clone_correction * sample * mutation_rate, data = ., contrasts = "contr.helmert") %>% broom::tidy()) %>%
clean_anova
# Type II ANOVA
AURC_ANOVA_ea_II <- map_df(.x = c(
"AURC ~ clone_correction * sample * mutation_rate",
"AURC ~ clone_correction * mutation_rate * sample",
"AURC ~ sample * mutation_rate * clone_correction"),
.f = ~AURC_by_seed_ea %>%
group_by(sexrate) %>%
do(lm(as.formula(.x), data = ., contrasts = "contr.helmert") %>% anova() %>% broom::tidy()) %>%
slice(3L)
) %>%
clean_anova
# Type III ANOVA
AURC_ANOVA_ea_III <- AURC_by_seed_ea %>%
group_by(sexrate) %>%
do(lm(AURC ~ clone_correction * sample * mutation_rate, data = ., contrasts = "contr.helmert") %>%
car::Anova(type = 3) %>%
broom::tidy()) %>%
clean_anova
ggAURC_ANOVA <- AURC_ANOVA_III %>% anova_heat
ggAURC_ANOVA_ea <- AURC_ANOVA_ea_III %>% anova_heat
# ggsave(plot = ggAURC_ANOVA, file = here::here('manuscript', 'figure', 'AURC_ANOVA.pdf'),
# width = 8, height = 4.5, dpi = 600)
# ggsave(plot = ggAURC_ANOVA_ea, file = here::here('manuscript', 'figure', 'AURC_ANOVA_ea.pdf'),
# width = 8, height = 4.5, dpi = 600)
ggAURC_ANOVA
ggAURC_ANOVA_ea
AURC_ANOVA_III %>% anova_table
## sexrate term sumsq df statistic p.value
## -------- ------------- ------- ----- ----------- --------
## 0.0000 (Intercept) 63.656 1 10770.588 0.000
## CC 0.941 1 159.132 0.000
## SS 0.121 3 6.849 0.000
## MR 0.718 1 121.519 0.000
## CC X SS 0.009 3 0.508 0.821
## CC X MR 0.184 1 31.139 0.000
## SS X MR 0.008 3 0.448 0.821
## CC X SS X MR 0.002 3 0.100 0.960
## Residuals 9.362 1584 NA NA
## 0.0001 (Intercept) 69.514 1 16381.417 0.000
## CC 0.669 1 157.731 0.000
## SS 0.087 3 6.868 0.000
## MR 0.492 1 115.834 0.000
## CC X SS 0.009 3 0.692 0.743
## CC X MR 0.155 1 36.490 0.000
## SS X MR 0.006 3 0.509 0.772
## CC X SS X MR 0.003 3 0.269 0.848
## Residuals 6.722 1584 NA NA
## 0.0005 (Intercept) 92.361 1 242355.790 0.000
## CC 0.056 1 145.973 0.000
## SS 0.081 3 70.721 0.000
## MR 0.050 1 131.889 0.000
## CC X SS 0.038 3 32.935 0.000
## CC X MR 0.025 1 65.529 0.000
## SS X MR 0.036 3 31.282 0.000
## CC X SS X MR 0.017 3 15.207 0.000
## Residuals 0.604 1584 NA NA
## 0.0010 (Intercept) 88.238 1 175614.133 0.000
## CC 0.148 1 295.033 0.000
## SS 0.207 3 137.104 0.000
## MR 0.092 1 182.716 0.000
## CC X SS 0.097 3 64.042 0.000
## CC X MR 0.045 1 90.083 0.000
## SS X MR 0.060 3 39.889 0.000
## CC X SS X MR 0.029 3 19.359 0.000
## Residuals 0.796 1584 NA NA
## 0.0050 (Intercept) 53.780 1 14843.714 0.000
## CC 0.984 1 271.454 0.000
## SS 2.596 3 238.880 0.000
## MR 0.167 1 46.105 0.000
## CC X SS 0.281 3 25.810 0.000
## CC X MR 0.042 1 11.571 0.001
## SS X MR 0.030 3 2.797 0.044
## CC X SS X MR 0.021 3 1.950 0.120
## Residuals 5.739 1584 NA NA
## 0.0100 (Intercept) 42.602 1 6274.609 0.000
## CC 0.679 1 100.035 0.000
## SS 2.603 3 127.797 0.000
## MR 0.078 1 11.548 0.001
## CC X SS 0.143 3 7.015 0.000
## CC X MR 0.007 1 1.002 0.317
## SS X MR 0.036 3 1.782 0.170
## CC X SS X MR 0.040 3 1.969 0.156
## Residuals 10.755 1584 NA NA
## 0.0500 (Intercept) 28.826 1 1961.256 0.000
## CC 0.052 1 3.549 0.120
## SS 0.356 3 8.075 0.000
## MR 0.005 1 0.330 0.905
## CC X SS 0.516 3 11.711 0.000
## CC X MR 0.000 1 0.009 0.959
## SS X MR 0.021 3 0.471 0.936
## CC X SS X MR 0.004 3 0.102 0.959
## Residuals 23.281 1584 NA NA
## 0.1000 (Intercept) 26.589 1 1597.356 0.000
## CC 0.012 1 0.740 0.912
## SS 0.043 3 0.870 0.912
## MR 0.002 1 0.101 0.992
## CC X SS 0.202 3 4.052 0.028
## CC X MR 0.000 1 0.016 0.992
## SS X MR 0.002 3 0.033 0.992
## CC X SS X MR 0.003 3 0.069 0.992
## Residuals 26.367 1584 NA NA
## 0.5000 (Intercept) 26.677 1 1557.989 0.000
## CC 0.000 1 0.007 0.999
## SS 0.017 3 0.337 0.999
## MR 0.001 1 0.048 0.999
## CC X SS 0.006 3 0.108 0.999
## CC X MR 0.000 1 0.002 0.999
## SS X MR 0.003 3 0.066 0.999
## CC X SS X MR 0.000 3 0.008 0.999
## Residuals 27.123 1584 NA NA
AURC_ANOVA_ea_III %>% anova_table
## sexrate term sumsq df statistic p.value
## -------- ------------- ------- ----- ----------- --------
## 0.0000 (Intercept) 98.377 1 285739.881 0.000
## CC 0.000 1 0.013 0.989
## SS 0.001 3 0.733 0.989
## MR 0.000 1 0.445 0.989
## CC X SS 0.000 3 0.200 0.989
## CC X MR 0.000 1 0.041 0.989
## SS X MR 0.000 3 0.160 0.989
## CC X SS X MR 0.000 3 0.041 0.989
## Residuals 0.545 1584 NA NA
## 0.0001 (Intercept) 98.168 1 284255.505 0.000
## CC 0.000 1 0.117 0.980
## SS 0.001 3 1.154 0.980
## MR 0.000 1 0.006 0.980
## CC X SS 0.000 3 0.159 0.980
## CC X MR 0.000 1 0.142 0.980
## SS X MR 0.000 3 0.123 0.980
## CC X SS X MR 0.000 3 0.062 0.980
## Residuals 0.547 1584 NA NA
## 0.0005 (Intercept) 93.044 1 218824.971 0.000
## CC 0.036 1 84.494 0.000
## SS 0.063 3 49.336 0.000
## MR 0.029 1 68.047 0.000
## CC X SS 0.024 3 18.824 0.000
## CC X MR 0.016 1 36.617 0.000
## SS X MR 0.021 3 16.241 0.000
## CC X SS X MR 0.011 3 8.434 0.000
## Residuals 0.674 1584 NA NA
## 0.0010 (Intercept) 87.928 1 147718.809 0.000
## CC 0.135 1 226.263 0.000
## SS 0.218 3 122.253 0.000
## MR 0.074 1 125.156 0.000
## CC X SS 0.088 3 49.237 0.000
## CC X MR 0.039 1 65.034 0.000
## SS X MR 0.049 3 27.484 0.000
## CC X SS X MR 0.025 3 13.976 0.000
## Residuals 0.943 1584 NA NA
## 0.0050 (Intercept) 53.341 1 13692.184 0.000
## CC 0.954 1 244.776 0.000
## SS 2.669 3 228.340 0.000
## MR 0.154 1 39.534 0.000
## CC X SS 0.272 3 23.248 0.000
## CC X MR 0.040 1 10.268 0.002
## SS X MR 0.030 3 2.559 0.061
## CC X SS X MR 0.022 3 1.855 0.135
## Residuals 6.171 1584 NA NA
## 0.0100 (Intercept) 42.896 1 6349.686 0.000
## CC 0.629 1 93.090 0.000
## SS 2.558 3 126.240 0.000
## MR 0.068 1 10.050 0.002
## CC X SS 0.150 3 7.402 0.000
## CC X MR 0.005 1 0.767 0.381
## SS X MR 0.041 3 2.037 0.122
## CC X SS X MR 0.043 3 2.122 0.122
## Residuals 10.701 1584 NA NA
## 0.0500 (Intercept) 28.832 1 1997.235 0.000
## CC 0.048 1 3.329 0.137
## SS 0.361 3 8.337 0.000
## MR 0.002 1 0.158 0.922
## CC X SS 0.520 3 11.996 0.000
## CC X MR 0.000 1 0.004 0.965
## SS X MR 0.025 3 0.577 0.922
## CC X SS X MR 0.004 3 0.092 0.965
## Residuals 22.866 1584 NA NA
## 0.1000 (Intercept) 26.781 1 1592.117 0.000
## CC 0.011 1 0.682 0.947
## SS 0.042 3 0.837 0.947
## MR 0.001 1 0.068 0.987
## CC X SS 0.206 3 4.082 0.027
## CC X MR 0.000 1 0.013 0.987
## SS X MR 0.002 3 0.046 0.987
## CC X SS X MR 0.004 3 0.072 0.987
## Residuals 26.644 1584 NA NA
## 0.5000 (Intercept) 27.332 1 1569.886 0.000
## CC 0.000 1 0.004 0.999
## SS 0.018 3 0.346 0.999
## MR 0.002 1 0.098 0.999
## CC X SS 0.006 3 0.111 0.999
## CC X MR 0.000 1 0.001 0.999
## SS X MR 0.003 3 0.051 0.999
## CC X SS X MR 0.000 3 0.008 0.999
## Residuals 27.578 1584 NA NA
The result that we see from this AMOVA supports what we see in the boxplots. Basically, clone correction, sample size, and mutation rate all have a real effect on the inference of recombination. At low levels of recombination (< 1%), There is a significant effect of the intraction between mutation rate and clone correction, but not of sample size and clone correction (with the exception of 0.05% and 0.1% sexual reproduction).
When taking into consideration \(E_{5A}\), things change. We can see a clear change in the clonal populations. The only significant effect in these popuations is that of sample size, which, when looking at the power analysis, makes sense because with low sample sizes, the false positive rate increases.
total_roc_table <- total_roc %>% filter(alpha %in% c(0.01, 0.05))
total_roc_table %>%
select(clone_correction, mutation_rate, sample, alpha, `True Positive`) %>%
spread(sample, `True Positive`) %>%
arrange(alpha, mutation_rate, clone_correction)
## # A tibble: 8 x 7
## clone_correction mutation_rate alpha `10` `25` `50`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 cc even 0.01 0.3354817 0.4190466 0.4598289
## 2 wd even 0.01 0.5002778 0.5912879 0.6468496
## 3 cc uneven 0.01 0.4329370 0.5047227 0.5489499
## 4 wd uneven 0.01 0.5179464 0.6195133 0.6695188
## 5 cc even 0.05 0.4137126 0.4933882 0.5350595
## 6 wd even 0.05 0.5593955 0.6490721 0.6916324
## 7 cc uneven 0.05 0.4958329 0.5718413 0.6168463
## 8 wd uneven 0.05 0.5800645 0.6714079 0.7126347
## # ... with 1 more variables: `100` <dbl>
ggplot(total_roc_table, aes(x = sample, y = `True Positive`, color = `False Positive`, pch = factor(alpha))) +
geom_point(size = 3, position = position_dodge(width = 0.5)) +
# geom_linerange(aes(ymax = `True Positive` + TPsd, ymin = `True Positive` - TPsd),
# position = position_dodge(width = 0.5), color = "black") +
scale_color_viridis() +
scale_y_continuous(limits = c(0, 1)) +
facet_grid(mutation_rate ~ clone_correction) +
theme(aspect.ratio = 0.5) +
theme(legend.position = "bottom") +
labs(list(
title = "Overall power to detect non-random mating",
shape = expression(alpha),
color = "False Positive Rate",
x = "Sample Size",
y = "True Positive Rate"
))
overall_roc_table <- roc_overall %>%
filter(alpha %in% c(0.01, 0.05)) %>%
fancy_clone_correction %>%
mutate(clone_correction = forcats::fct_rev(clone_correction)) %>%
fancy_mutation_rate
powerplot <- . %>% {
ggplot(., aes(y = `True Positive`, x = sexrate, color = sample, alpha = `False Positive`)) +
geom_point(size = 2) +
geom_line(aes(group = sample)) +
facet_grid(clone_correction ~ mutation_rate, switch = "y") +
scale_alpha(range = c(1, 0.5), limits = c(0, 0.03), oob = I) +
scale_y_continuous(position = "right", breaks = c(0, 0.5, 1),
minor_breaks = c(seq(0, 1, by = 0.125))) +
theme(aspect.ratio = 0.5) +
theme(strip.text = element_text(size = 12, face = "bold")) +
theme(strip.background = element_blank()) +
theme(text = element_text(size = 16)) +
theme(legend.position = "top") +
theme(legend.box = "vertical") +
theme(legend.box.just = "left") +
theme(legend.spacing = unit(0, "line")) +
sexrate_theme +
labs(list(
x = "Rate of Sexual Reproduction",
y = "True Positive Rate",
alpha = "False Positive Rate",
color = "Sample Size"
)) +
sample_colors
}
powerplot0.01 <- overall_roc_table %>% filter(alpha == 0.01) %>% powerplot
powerplot0.05 <- overall_roc_table %>% filter(alpha == 0.05) %>%
powerplot + scale_alpha(range = c(1, 0.25), limits = c(0, 0.06))
## Scale for 'alpha' is already present. Adding another scale for 'alpha',
## which will replace the existing scale.
overall_roc_table_ea <- roc_overall_ea %>%
filter(alpha %in% c(0.01, 0.05)) %>%
fancy_clone_correction %>%
mutate(clone_correction = forcats::fct_rev(clone_correction)) %>%
fancy_mutation_rate
powerplot0.01_ea <- overall_roc_table_ea %>% filter(alpha == 0.01) %>% powerplot
powerplot0.01
powerplot0.01_ea
powerplot0.05
# ggsave(plot = powerplot0.01, file = here::here('manuscript', 'figure', 'ssr_power.pdf'),
# width = 6, height = 5, dpi = 600)
# ggsave(plot = powerplot0.01_ea, file = here::here('manuscript', 'figure', 'ssr_power_ea.pdf'),
# width = 6, height = 5, dpi = 600)
options(width = 100)
devtools::session_info()
## Session info --------------------------------------------------------------------------------------
## setting value
## version R version 3.4.1 (2017-06-30)
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_US.UTF-8
## tz America/Chicago
## date 2017-08-27
## Packages ------------------------------------------------------------------------------------------
## package * version date source
## ade4 * 1.7-8 2017-08-09 cran (@1.7-8)
## adegenet * 2.1.0 2017-07-17 local
## ape 4.1 2017-02-14 CRAN (R 3.4.0)
## assertthat 0.2.0 2017-04-11 CRAN (R 3.4.0)
## backports 1.1.0 2017-05-22 CRAN (R 3.4.0)
## base * 3.4.1 2017-07-07 local
## bindr 0.1 2016-11-13 CRAN (R 3.4.0)
## bindrcpp * 0.2 2017-06-17 CRAN (R 3.4.0)
## bitops 1.0-6 2013-08-17 CRAN (R 3.4.0)
## boot 1.3-20 2017-07-30 CRAN (R 3.4.1)
## broom 0.4.2 2017-02-13 CRAN (R 3.4.0)
## car 2.1-5 2017-07-04 CRAN (R 3.4.1)
## caTools * 1.17.1 2014-09-10 CRAN (R 3.4.0)
## cellranger 1.1.0 2016-07-27 CRAN (R 3.4.0)
## cluster 2.0.6 2017-03-16 CRAN (R 3.4.0)
## coda 0.19-1 2016-12-08 CRAN (R 3.4.0)
## colorspace 1.3-2 2016-12-14 CRAN (R 3.4.0)
## compiler 3.4.1 2017-07-07 local
## datasets * 3.4.1 2017-07-07 local
## deldir 0.1-14 2017-04-22 CRAN (R 3.4.0)
## devtools 1.13.3 2017-08-02 CRAN (R 3.4.1)
## digest 0.6.12 2017-01-27 CRAN (R 3.4.0)
## dplyr * 0.7.2 2017-07-20 CRAN (R 3.4.1)
## evaluate 0.10.1 2017-06-24 CRAN (R 3.4.1)
## expm 0.999-2 2017-03-29 CRAN (R 3.4.0)
## fastmatch 1.1-0 2017-01-28 CRAN (R 3.4.0)
## feather * 0.3.1 2016-11-09 CRAN (R 3.4.0)
## flux * 0.3-0 2014-04-25 CRAN (R 3.4.0)
## forcats 0.2.0 2017-01-23 CRAN (R 3.4.0)
## foreign 0.8-69 2017-06-21 CRAN (R 3.4.0)
## gdata 2.18.0 2017-06-06 CRAN (R 3.4.0)
## ggplot2 * 2.2.1 2016-12-30 CRAN (R 3.4.0)
## glue 1.1.1 2017-06-21 CRAN (R 3.4.0)
## gmodels 2.16.2 2015-07-22 CRAN (R 3.4.0)
## graphics * 3.4.1 2017-07-07 local
## grDevices * 3.4.1 2017-07-07 local
## grid * 3.4.1 2017-07-07 local
## gridExtra 2.2.1 2016-02-29 CRAN (R 3.4.0)
## gtable * 0.2.0 2016-02-26 CRAN (R 3.4.0)
## gtools 3.5.0 2015-05-29 CRAN (R 3.4.0)
## haven 1.1.0 2017-07-09 CRAN (R 3.4.1)
## highr 0.6 2016-05-09 CRAN (R 3.4.0)
## hms 0.3 2016-11-22 CRAN (R 3.4.0)
## htmltools 0.3.6 2017-04-28 CRAN (R 3.4.0)
## httpuv 1.3.5 2017-07-04 CRAN (R 3.4.1)
## httr 1.2.1 2016-07-03 CRAN (R 3.4.0)
## igraph 1.1.2 2017-07-21 cran (@1.1.2)
## jsonlite 1.5 2017-06-01 CRAN (R 3.4.0)
## knitr 1.16 2017-05-18 CRAN (R 3.4.0)
## labeling 0.3 2014-08-23 CRAN (R 3.4.0)
## lattice 0.20-35 2017-03-25 CRAN (R 3.4.0)
## lazyeval 0.2.0 2016-06-12 CRAN (R 3.4.0)
## LearnBayes 2.15 2014-05-29 CRAN (R 3.4.0)
## lme4 1.1-13 2017-04-19 CRAN (R 3.4.0)
## lubridate 1.6.0 2016-09-13 CRAN (R 3.4.0)
## magrittr 1.5 2014-11-22 CRAN (R 3.4.0)
## MASS 7.3-47 2017-04-21 CRAN (R 3.4.0)
## Matrix 1.2-10 2017-04-28 CRAN (R 3.4.0)
## MatrixModels 0.4-1 2015-08-22 CRAN (R 3.4.0)
## memoise 1.1.0 2017-04-21 CRAN (R 3.4.0)
## methods * 3.4.1 2017-07-07 local
## mgcv 1.8-18 2017-07-28 CRAN (R 3.4.1)
## mime 0.5 2016-07-07 CRAN (R 3.4.0)
## minqa 1.2.4 2014-10-09 CRAN (R 3.4.0)
## mnormt 1.5-5 2016-10-15 CRAN (R 3.4.0)
## modelr 0.1.1 2017-07-24 CRAN (R 3.4.1)
## munsell 0.4.3 2016-02-13 CRAN (R 3.4.0)
## nlme 3.1-131 2017-02-06 CRAN (R 3.4.0)
## nloptr 1.0.4 2014-08-04 CRAN (R 3.4.0)
## nnet 7.3-12 2016-02-02 CRAN (R 3.4.0)
## parallel 3.4.1 2017-07-07 local
## pbkrtest 0.4-7 2017-03-15 CRAN (R 3.4.0)
## pegas 0.10 2017-05-03 CRAN (R 3.4.0)
## permute 0.9-4 2016-09-09 CRAN (R 3.4.0)
## phangorn 2.2.0 2017-04-03 CRAN (R 3.4.0)
## pinfsc50 1.1.0 2016-12-02 CRAN (R 3.4.0)
## pkgconfig 2.0.1 2017-03-21 CRAN (R 3.4.0)
## plyr 1.8.4 2016-06-08 CRAN (R 3.4.0)
## poppr * 2.4.1.99-2 2017-08-27 Github (grunwaldlab/poppr@cd4cba2)
## poweRlaw * 0.70.0 2016-12-22 CRAN (R 3.4.0)
## psych 1.7.5 2017-05-03 CRAN (R 3.4.0)
## purrr * 0.2.3 2017-08-02 CRAN (R 3.4.1)
## quadprog 1.5-5 2013-04-17 CRAN (R 3.4.0)
## quantreg 5.33 2017-04-18 CRAN (R 3.4.0)
## R6 2.2.2 2017-06-17 cran (@2.2.2)
## Rcpp 0.12.12 2017-07-15 cran (@0.12.12)
## readr * 1.1.1 2017-05-16 CRAN (R 3.4.0)
## readxl 1.0.0 2017-04-18 CRAN (R 3.4.0)
## reshape2 1.4.2 2016-10-22 CRAN (R 3.4.0)
## rlang 0.1.1 2017-05-18 CRAN (R 3.4.0)
## rmarkdown 1.6 2017-06-15 cran (@1.6)
## rprojroot 1.2 2017-01-16 CRAN (R 3.4.0)
## rvest 0.3.2 2016-06-17 CRAN (R 3.4.0)
## scales 0.4.1.9002 2017-08-02 Github (hadley/scales@842ad87)
## seqinr 3.4-5 2017-08-01 CRAN (R 3.4.1)
## shiny 1.0.5 2017-08-23 cran (@1.0.5)
## sp 1.2-5 2017-06-29 CRAN (R 3.4.1)
## SparseM 1.77 2017-04-23 CRAN (R 3.4.0)
## spdep 0.6-13 2017-04-25 CRAN (R 3.4.0)
## splines 3.4.1 2017-07-07 local
## stats * 3.4.1 2017-07-07 local
## stats4 3.4.1 2017-07-07 local
## stringi 1.1.5 2017-04-07 CRAN (R 3.4.0)
## stringr 1.2.0 2017-02-18 CRAN (R 3.4.0)
## tibble * 1.3.3 2017-05-28 CRAN (R 3.4.0)
## tidyr * 0.6.3 2017-05-15 CRAN (R 3.4.0)
## tidyverse * 1.1.1 2017-01-27 CRAN (R 3.4.0)
## tools 3.4.1 2017-07-07 local
## utils * 3.4.1 2017-07-07 local
## vcfR * 1.5.0 2017-05-18 cran (@1.5.0)
## vegan 2.4-4 2017-08-24 cran (@2.4-4)
## VGAM 1.0-4 2017-07-25 CRAN (R 3.4.1)
## viridis * 0.4.0 2017-03-27 CRAN (R 3.4.0)
## viridisLite * 0.2.0 2017-03-24 CRAN (R 3.4.0)
## withr 2.0.0 2017-07-28 CRAN (R 3.4.1)
## xml2 1.1.1 2017-01-24 CRAN (R 3.4.0)
## xtable 1.8-2 2016-02-05 CRAN (R 3.4.0)
## yaml 2.1.14 2016-11-12 CRAN (R 3.4.0)
## zksimanalysis * 0.9.0.9000 2017-08-26 local